################################################
# Monte Carlos for:
#
# Warner, Zach, Garrett N. Vande Kamp, and Soren Jordan.
# ``Conditional Relationships in Dynamic Models.'' 
#  Political Science Research & Methods
#
# This generates the file: WVKJ-MC-simulations.csv
#
################################################


rm(list = ls())
library(foreign); library(tidyverse); library(ggplot2); library(plm); library(lmtest); library(sandwich); library(fpp); library(caret)
library(Ecdat); library(plyr); library(boot); library(MASS); library(grid); library(gridExtra); library(Rcpp)
library(RColorBrewer); library(extrafont); library(fontcm); library(showtext); loadfonts()
library(doMC); library(foreach); registerDoMC(4) #parallelization


#### Utilities
mypal <- c("#000000", "#B78E47", "#b70101")
completeFun <- function(data, desiredCols) {
  completeVec <- complete.cases(data[, desiredCols])
  return(data[completeVec, ])
}

cl <- function(dat,fm, cluster) {
  library(sandwich, quietly = TRUE)
  library(lmtest, quietly = TRUE)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL) 
}

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  plots <- c(list(...), plotlist)
  numPlots = length(plots)
  if (is.null(layout)) {
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  if (numPlots==1) {
    print(plots[[1]])
  } else {
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    for (i in 1:numPlots) {
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

# R^2 from glm objects
r2.corr <- function(m) {
  lmfit <-  lm(model.response(model.frame(m)) ~ fitted(m))
  summary(lmfit)$r.squared
} 
# for getting the mode of a variable
Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}





# These parameters are calso called in the sim analysis below the data creation.
# everything stationary with rho = 0.5
rho.x <- c(0.1, 0.5, 0.9) 
rho.z <- c(0.1, 0.5, 0.9) 
rho.y <- c(0.1, 0.5, 0.9) 
nobs <- c(16, 31, 51, 101, 501)  # add 1 because we'll drop the first value due to lagged values being NA

#set up the master dataframes for conditions to vary
exper <- expand.grid(nobs = nobs,
                     rho.x = rho.x,
                     rho.z = rho.z,
                     rho.y = rho.y)

# true values not varying
alpha.0 = 0   # intercept
alpha.1 = .5  # l.y
beta.0 = -.3   # x
beta.1 = .3   # l.x
beta.2 = .2  # z
beta.3 = -.7  # l.z
beta.4 = .5  # x_z
beta.5 = -.25  # l.x_z
beta.6 = .1  # x_l.z
beta.7 = -.4  # l.x_l.z

# 500 reps per condition
reps <- 500









##### MONTE CARLO SIMULATIONS  -- these take 24 hours #####

set.seed(13262357)

exper.list <- as.list(NULL)
exper.noint.list <- as.list(NULL)
exper.restricted.list <- as.list(NULL)

# store estimates and such from model with no interaction
reps.noint.df <- data.frame(matrix(ncol = 37, nrow = reps))
names(reps.noint.df) <- c(paste0("alpha.", 0:1), paste0("beta.", 0:3), paste0("p.alpha.", 0:1), paste0("p.beta.", 0:3), paste0("ss.alpha.", 0:1),
							paste0("ss.beta.", 0:3), paste0("cov.alpha.", 0:1), paste0("cov.beta.", 0:3), paste0("var.alpha.", 0:1), 
							paste0("var.beta.", 0:3), paste0("bias.alpha.", 0:1), paste0("bias.beta.", 0:3), "RMSE")

# store estimates and such from model with only present-period interaction
reps.restricted.df <- data.frame(matrix(ncol = 43, nrow = reps))
names(reps.restricted.df) <- c(paste0("alpha.", 0:1), paste0("beta.", 0:4), paste0("p.alpha.", 0:1), paste0("p.beta.", 0:4), paste0("ss.alpha.", 0:1),
							paste0("ss.beta.", 0:4), paste0("cov.alpha.", 0:1), paste0("cov.beta.", 0:4), paste0("var.alpha.", 0:1), 
							paste0("var.beta.", 0:4), paste0("bias.alpha.", 0:1), paste0("bias.beta.", 0:4), "RMSE")

# store the estimates and such from the full model
reps.df <- data.frame(matrix(ncol = 61, nrow = reps))
names(reps.df) <- c(paste0("alpha.", 0:1), paste0("beta.", 0:7), paste0("p.alpha.", 0:1), paste0("p.beta.", 0:7), paste0("ss.alpha.", 0:1),
							paste0("ss.beta.", 0:7), paste0("cov.alpha.", 0:1), paste0("cov.beta.", 0:7), paste0("var.alpha.", 0:1), 
							paste0("var.beta.", 0:7), paste0("bias.alpha.", 0:1), paste0("bias.beta.", 0:7), "RMSE")

### Run the loop for case when any relationship is spurious
for(k in 1:nrow(exper)){
# Or, for parallel	
# foreach(k = 1:nrow(exper)) %dopar% {

  message(paste("experiment", k, "of", nrow(exper), sep = " "))
  
  # old, patch -- 50% subset rather than 20%, still 50 obs
  rmse.vec <- sample(seq(1:reps), reps/2, replace = FALSE)
  #rmse.vec <- sample(seq(1:reps), reps/5, replace = F)
  
  pb <- txtProgressBar(min = 0, max = reps, style = 3)
  for(j in 1:reps){
    
    ### data prep
    x <- arima.sim(list(ar = exper$rho.x[k]), exper$nobs[k], n.start = 100)
    z <- arima.sim(list(ar = exper$rho.z[k]), exper$nobs[k], n.start = 100)
    df <- data.frame(cbind(x, z))
    # create lags
    df <- ddply(df, .(), transform, l.x = c(NA, x[-length(x)]))
    df <- ddply(df, .(), transform, l.z = c(NA, z[-length(z)]))
    # create interactions
    df$xz <- df$x*df$z
    df$l.x_z <- df$l.x*df$z
    df$x_l.z <- df$x*df$l.z
    df$l.x_l.z <- df$l.x*df$l.z

    # create DV -- spurious
    df$y <- arima.sim(list(ar = exper$rho.y[k]), exper$nobs[k], n.start=100)
    # create lag DV
    df <- ddply(df, .(), transform, l.y = c(NA, y[-length(y)]))
    # drop first obs so no NA
    df <- df[-1, ]
    
    ### estimate models
    nointmod <- glm(y ~ l.y + x + l.x + z + l.z, data = df)
    restrictedmod <- glm(y ~ l.y + x + l.x + z + l.z + xz, data = df)
    genmod <- glm(y ~ l.y + x + l.x + z + l.z + xz + l.x_z + x_l.z + l.x_l.z,
                  data = df)
    
    ### save estimates and p values
    reps.noint.df[j, 1:12] <- matrix(c(summary(nointmod)$coeff[, 1],
                                summary(nointmod)$coeff[, 4]), nrow = 1)
    reps.restricted.df[j, 1:14] <- matrix( c(summary(restrictedmod)$coeff[, 1],
                                     summary(restrictedmod)$coeff[, 4]), nrow = 1)
    reps.df[j, 1:20] <- matrix( c(summary(genmod)$coeff[, 1],
                          summary(genmod)$coeff[, 4]), nrow = 1)
    
    ### save coverage -- includes Mult Comp. correction
    ci.noint <- data.frame(suppressMessages(confint(nointmod)))
    names(ci.noint) <- c('lo', 'hi')
    ci.restricted <- data.frame(suppressMessages(confint(restrictedmod)))
    names(ci.restricted) <- c('lo', 'hi')
    ci.gen <- data.frame(suppressMessages(confint(genmod)))
    names(ci.gen) <- c('lo', 'hi')
    ci.noint$true <- c(0, exper$rho.y[k], 0, 0, 0, 0)                 # no relationships in this case
    ci.restricted$true <- c(0, exper$rho.y[k], 0, 0, 0, 0, 0)          # no relationships in this case
    ci.gen$true <- c(0, exper$rho.y[k], 0, 0, 0, 0, 0, 0, 0, 0)           # no relationships in this case
    ci.noint$cov <- ifelse(ci.noint$lo < ci.noint$true &
                             ci.noint$hi > ci.noint$true, 1, 0)
    ci.restricted$cov <- ifelse(ci.restricted$lo < ci.restricted$true &
                                  ci.restricted$hi > ci.restricted$true, 1, 0)
    ci.gen$cov <- ifelse(ci.gen$lo < ci.gen$true & ci.gen$hi > ci.gen$true, 1, 0)
    reps.noint.df[j, c(19:24)] <- matrix(ci.noint$cov, nrow = 1)
    reps.restricted.df[j, c(22:28)] <- matrix(ci.restricted$cov, nrow = 1)
    reps.df[j, c(31:40)] <- matrix(ci.gen$cov, nrow = 1)
    
    ### calculate RMSE from 5-fold cross-val -- for computational reasons, take a random 50% subset
    if((j %in% rmse.vec) == TRUE){
      tc <- trainControl("cv", 5)
      reps.noint.df[j, ncol(reps.noint.df)] <-
        train(y ~ l.y + x + l.x + z + l.z,
              data = df, method = "glm", trControl = tc)$results$RMSE
      reps.restricted.df[j,ncol(reps.restricted.df)] <-
        train(y ~ l.y + x + l.x + z + l.z + xz,
              data = df, method = "glm", trControl = tc)$results$RMSE
      reps.df[j, ncol(reps.df)] <-
        train(y ~ l.y + x + l.x + z + l.z + xz + l.x_z + x_l.z + l.x_l.z,
              data = df, method = "glm", trControl = tc)$results$RMSE
    }
    
    ### update progress bar
    setTxtProgressBar(pb, j)
  }
  
  # Still in the k loop over the experimental parameters
  
  ###significance of the ECM stat, E+M critical values
  # SJ: I am not double-checking these critical values, which come from Warner alone.
  
  reps.noint.df[, 14] <- ifelse(abs(summary(nointmod)$coeff[2, 3]) >
                                  abs(-3.33613 + ((-6.1101)/exper$nobs[k]) +
                                        ((-6.823)/(exper$nobs[k])^2)), 1, 0) #for 2 var, constant, 5%
  reps.restricted.df[, 16] <- ifelse(abs(summary(restrictedmod)$coeff[2, 3]) >
                                       abs(-3.74066 + ((-8.5631)/exper$nobs[k]) +
                                             ((-10.852)/(exper$nobs[k])^2) +
                                             ((27.982)/(exper$nobs[k])^3)), 1, 0) #for 3 var, constant, 5%
  reps.df[, 22] <- ifelse(abs(summary(genmod)$coeff[2, 3]) >
                            abs(-3.74066 + ((-8.5631)/exper$nobs[k]) +
                                  ((-10.852)/(exper$nobs[k])^2) +
                                  ((27.982)/(exper$nobs[k])^3)), 1, 0) #for 3 var, constant, 5%
  
  ### save statistical significance for other variables
  for(i in c(13, 15:18)){
    reps.noint.df[,i] <- ifelse(reps.noint.df[, (i - 6)] < .05, 1, 0)
  }
  for(i in c(15, 17:21)){
    reps.restricted.df[,i] <- ifelse(reps.restricted.df[, (i - 7)] < .05, 1, 0)
  }
  for(i in c(21, 23:30)){
    reps.df[,i] <- ifelse(reps.df[, (i - 10)] < .05, 1, 0)
  }
  
  ### save MC variance -- a little inefficient, but the time cost is small
  for(i in 25:30){
    reps.noint.df[, i] <- rep(var(reps.noint.df[, (i - 24)]), reps)
  }
  for(i in 29:35){
    reps.restricted.df[, i] <- rep(var(reps.restricted.df[, (i - 28)]), reps)
  }
  for(i in 41:50){
    reps.df[, i] <- rep(var(reps.df[, (i - 40)]), reps)
  }
  
  ### save bias
  reps.noint.df$bias.alpha.0 <- abs(reps.noint.df$alpha.0 - alpha.0)
  reps.noint.df$bias.alpha.1 <- abs(reps.noint.df$alpha.1 - alpha.1)
  reps.noint.df$bias.beta.0 <- abs(reps.noint.df$beta.0 - beta.0)
  reps.noint.df$bias.beta.1 <- abs(reps.noint.df$beta.1 - beta.1)
  reps.noint.df$bias.beta.2 <- abs(reps.noint.df$beta.2 - beta.2)
  reps.noint.df$bias.beta.3 <- abs(reps.noint.df$beta.3 - beta.3)
  reps.restricted.df$bias.alpha.0 <- abs(reps.restricted.df$alpha.0 - alpha.0)
  reps.restricted.df$bias.alpha.1 <- abs(reps.restricted.df$alpha.1 - alpha.1)
  reps.restricted.df$bias.beta.0 <- abs(reps.restricted.df$beta.0 - beta.0)
  reps.restricted.df$bias.beta.1 <- abs(reps.restricted.df$beta.1 - beta.1)
  reps.restricted.df$bias.beta.2 <- abs(reps.restricted.df$beta.2 - beta.2)
  reps.restricted.df$bias.beta.3 <- abs(reps.restricted.df$beta.3 - beta.3)
  reps.restricted.df$bias.beta.4 <- abs(reps.restricted.df$beta.4 - beta.4)
  reps.df$bias.alpha.0 <- abs(reps.df$alpha.0 - alpha.0)
  reps.df$bias.alpha.1 <- abs(reps.df$alpha.1 - alpha.1)
  reps.df$bias.beta.0 <- abs(reps.df$beta.0 - beta.0)
  reps.df$bias.beta.1 <- abs(reps.df$beta.1 - beta.1)
  reps.df$bias.beta.2 <- abs(reps.df$beta.2 - beta.2)
  reps.df$bias.beta.3 <- abs(reps.df$beta.3 - beta.3)
  reps.df$bias.beta.4 <- abs(reps.df$beta.4 - beta.4)
  reps.df$bias.beta.5 <- abs(reps.df$beta.5 - beta.5)
  reps.df$bias.beta.6 <- abs(reps.df$beta.6 - beta.6)
  reps.df$bias.beta.7 <- abs(reps.df$beta.7 - beta.7)
  
  # export data
  exper.list[[k]] <- reps.df
  exper.noint.list[[k]] <- reps.noint.df
  exper.restricted.list[[k]] <- reps.restricted.df
  close(pb) #error due to length of 0
}


# merge into final df
df <- data.frame(matrix(nrow=0,ncol=10))
colnames(df) <- c("lo", "hi", "mean", "median", "var", "nobs", "rho.x", "rho.z", "rho.y", "model")
# quantiles -- full model
for(i in 1:nrow(exper)){
  tmp <- exper.list[[i]]
  tmpdf <- data.frame(t(apply(tmp, 2, quantile,c(.025,.975), na.rm=T))); colnames(tmpdf) <- c("lo","hi")
  tmpdf$mean <- apply(tmp, 2, mean, na.rm=TRUE)
  tmpdf$median <- apply(tmp, 2, median, na.rm=TRUE)
  tmpdf$var <- rownames(tmpdf); rownames(tmpdf) <- NULL
  tmpdf$nobs <- rep(exper$nobs[i],nrow(tmpdf))
  tmpdf$rho.x <- rep(exper$rho.x[i],nrow(tmpdf))
  tmpdf$rho.z <- rep(exper$rho.z[i],nrow(tmpdf))
  tmpdf$rho.y <- rep(exper$rho.y[i],nrow(tmpdf))
  tmpdf$model <- rep("full",nrow(tmpdf))
  for(j in 1:ncol(tmpdf)){
    tmpdf[,j] <- as.character(tmpdf[,j])
  }
  df <- rbind(df,tmpdf)
}

# quantiles -- no interaction model
for(i in 1:nrow(exper)){
  tmp <- exper.noint.list[[i]]
  tmpdf <- data.frame(t(apply(tmp, 2, quantile,c(.025,.975), na.rm=T))); colnames(tmpdf) <- c("lo","hi")
  tmpdf$mean <- apply(tmp, 2, mean, na.rm=TRUE)
  tmpdf$median <- apply(tmp, 2, median, na.rm=TRUE)
  tmpdf$var <- rownames(tmpdf); rownames(tmpdf) <- NULL
  tmpdf$nobs <- rep(exper$nobs[i],nrow(tmpdf))
  tmpdf$rho.x <- rep(exper$rho.x[i],nrow(tmpdf))
  tmpdf$rho.z <- rep(exper$rho.z[i],nrow(tmpdf))
  tmpdf$rho.y <- rep(exper$rho.y[i],nrow(tmpdf))
  tmpdf$model <- rep("noint",nrow(tmpdf))
  for(j in 1:ncol(tmpdf)){
    tmpdf[,j] <- as.character(tmpdf[,j])
  }
  df <- rbind(df,tmpdf)
}

# quantiles -- restricted model
for(i in 1:nrow(exper)){
  tmp <- exper.restricted.list[[i]]
  tmpdf <- data.frame(t(apply(tmp, 2, quantile,c(.025,.975), na.rm=T))); colnames(tmpdf) <- c("lo","hi")
  tmpdf$mean <- apply(tmp, 2, mean, na.rm=TRUE)
  tmpdf$median <- apply(tmp, 2, median, na.rm=TRUE)
  tmpdf$var <- rownames(tmpdf); rownames(tmpdf) <- NULL
  tmpdf$nobs <- rep(exper$nobs[i],nrow(tmpdf))
  tmpdf$rho.x <- rep(exper$rho.x[i],nrow(tmpdf))
  tmpdf$rho.z <- rep(exper$rho.z[i],nrow(tmpdf))
  tmpdf$rho.y <- rep(exper$rho.y[i],nrow(tmpdf))
  tmpdf$model <- rep("restricted",nrow(tmpdf))
  for(j in 1:ncol(tmpdf)){
    tmpdf[,j] <- as.character(tmpdf[,j])
  }
  df <- rbind(df,tmpdf)
}
# finally, store for later
df_spurious <- df







#reset the master info
exper <- expand.grid(nobs = nobs,
                     rho.x = rho.x,
                     rho.z = rho.z,
                     rho.y = rho.y)
exper.list <- as.list(NULL)
exper.noint.list <- as.list(NULL)
exper.restricted.list <- as.list(NULL)

# store estimates and such from model with no interaction
reps.noint.df <- data.frame(matrix(ncol = 37, nrow = reps))
names(reps.noint.df) <- c(paste0("alpha.", 0:1), paste0("beta.", 0:3), paste0("p.alpha.", 0:1), paste0("p.beta.", 0:3), paste0("ss.alpha.", 0:1),
							paste0("ss.beta.", 0:3), paste0("cov.alpha.", 0:1), paste0("cov.beta.", 0:3), paste0("var.alpha.", 0:1), 
							paste0("var.beta.", 0:3), paste0("bias.alpha.", 0:1), paste0("bias.beta.", 0:3), "RMSE")

# store estimates and such from model with only present-period interaction
reps.restricted.df <- data.frame(matrix(ncol = 43, nrow = reps))
names(reps.restricted.df) <- c(paste0("alpha.", 0:1), paste0("beta.", 0:4), paste0("p.alpha.", 0:1), paste0("p.beta.", 0:4), paste0("ss.alpha.", 0:1),
							paste0("ss.beta.", 0:4), paste0("cov.alpha.", 0:1), paste0("cov.beta.", 0:4), paste0("var.alpha.", 0:1), 
							paste0("var.beta.", 0:4), paste0("bias.alpha.", 0:1), paste0("bias.beta.", 0:4), "RMSE")

# store the estimates and such from the full model
reps.df <- data.frame(matrix(ncol = 61, nrow = reps))
names(reps.df) <- c(paste0("alpha.", 0:1), paste0("beta.", 0:7), paste0("p.alpha.", 0:1), paste0("p.beta.", 0:7), paste0("ss.alpha.", 0:1),
							paste0("ss.beta.", 0:7), paste0("cov.alpha.", 0:1), paste0("cov.beta.", 0:7), paste0("var.alpha.", 0:1), 
							paste0("var.beta.", 0:7), paste0("bias.alpha.", 0:1), paste0("bias.beta.", 0:7), "RMSE")

### Run the loop for case when there's no interaction present
for(k in 1:nrow(exper)){
# Or, for parallel	
# foreach(k = 1:nrow(exper)) %dopar% {
	
    message(paste("experiment", k, "of", nrow(exper), sep = " "))
    
    # old, patch -- 50% subset rather than 20%, still 50 obs
    rmse.vec <- sample(seq(1:reps), reps/2, replace = FALSE)
    #rmse.vec <- sample(seq(1:reps), reps/5, replace = FALSE)
    
    pb <- txtProgressBar(min = 0, max = reps, style = 3)
    for(j in 1:reps){
      
      ### set up the data
      x <- arima.sim(list(ar = exper$rho.x[k]), exper$nobs[k], n.start = 100)
      z <- arima.sim(list(ar = exper$rho.z[k]), exper$nobs[k], n.start = 100)
      df <- data.frame(cbind(x, z))
      # create lags
      df <- ddply(df, .(), transform, l.x = c(NA, x[-length(x)]))
      df <- ddply(df, .(), transform, l.z = c(NA, z[-length(z)]))
      # create interactions
      df$xz <- df$x*df$z
      df$l.x_z <- df$l.x*df$z
      df$x_l.z <- df$x*df$l.z
      df$l.x_l.z <- df$l.x*df$l.z
      # create DV -- no interaction term
      df$y <- rep(NA, nrow(df))
      df$y[1] <- rnorm(1) #randomly start
      for(i in 2:nrow(df)){
        df$y[i] <- alpha.0 + alpha.1*df$y[i-1] + beta.0*df$x[i] +
          beta.1*df$l.x[i] + beta.2*df$z[i] + beta.3*df$l.z[i] + rnorm(1)
      }
      # create lag DV
      df <- ddply(df, .(), transform, l.y = c(NA, y[-length(y)]))
      # drop first obs so no NA
      df <- df[-1, ]
      
      ### estimate the models
      nointmod <- glm(y ~ l.y + x + l.x + z + l.z, data = df)
      restrictedmod <- glm(y ~ l.y + x + l.x + z + l.z + xz, data = df)
      genmod <- glm(y ~ l.y + x + l.x + z + l.z + xz + l.x_z + x_l.z + l.x_l.z,
                    data = df)
      
      ### save estimates and p values
      reps.noint.df[j, 1:12] <- matrix(c(summary(nointmod)$coeff[, 1],
                                summary(nointmod)$coeff[, 4]), nrow = 1)
      reps.restricted.df[j, 1:14] <- matrix( c(summary(restrictedmod)$coeff[, 1],
                                     summary(restrictedmod)$coeff[, 4]), nrow = 1)
      reps.df[j, 1:20] <- matrix(c(summary(genmod)$coeff[, 1],
                          summary(genmod)$coeff[, 4]), nrow = 1)
      
      ### save coverage -- includes Mult Comp. correction
      ci.noint <- data.frame(suppressMessages(confint(nointmod)))
      names(ci.noint) <- c('lo', 'hi')
      ci.restricted <- data.frame(suppressMessages(confint(restrictedmod)))
      names(ci.restricted) <- c('lo', 'hi')
      ci.gen <- data.frame(suppressMessages(confint(genmod)))
      names(ci.gen) <- c('lo', 'hi')
      ci.noint$true <- c(alpha.0, alpha.1, beta.0, beta.1, beta.2, beta.3)
      ci.restricted$true <- c(alpha.0, alpha.1, beta.0, beta.1, beta.2, beta.3, 0) # b4 = 0 in this sim
      ci.gen$true <- c(alpha.0, alpha.1, beta.0, beta.1, beta.2, beta.3, 0, 0, 0, 0) # b4 = b5 = b6 = b7 = 0 in this sim
      ci.noint$cov <- ifelse(ci.noint$lo < ci.noint$true &
                               ci.noint$hi > ci.noint$true, 1, 0)
      ci.restricted$cov <- ifelse(ci.restricted$lo < ci.restricted$true &
                                    ci.restricted$hi > ci.restricted$true, 1, 0)
      ci.gen$cov <- ifelse(ci.gen$lo < ci.gen$true &
                             ci.gen$hi > ci.gen$true, 1, 0)
      reps.noint.df[j, c(19:24)] <- matrix(ci.noint$cov, nrow = 1)
      reps.restricted.df[j, c(22:28)] <- matrix(ci.restricted$cov, nrow = 1)
      reps.df[j, c(31:40)] <- matrix(ci.gen$cov, nrow = 1)
  
      ### calculate RMSE from 5-fold cross-val -- for computational reasons, take a random 50% subset
      if((j %in% rmse.vec) == TRUE){
        tc <- trainControl("cv", 5)
        reps.noint.df[j, ncol(reps.noint.df)] <-
          train(y ~ l.y + x + l.x + z + l.z,
                data = df, method = "glm", trControl = tc)$results$RMSE
        reps.restricted.df[j,ncol(reps.restricted.df)] <-
          train(y ~ l.y + x + l.x + z + l.z + xz,
                data = df, method = "glm", trControl = tc)$results$RMSE
        reps.df[j,ncol(reps.df)] <-
          train(y ~ l.y + x + l.x + z + l.z + xz + l.x_z + x_l.z + l.x_l.z,
                data = df, method = "glm", trControl = tc)$results$RMSE
      }
      # update progress bar
      setTxtProgressBar(pb, j)
    }
    
    ### save significance of the ECM stat, E+M critical values
    reps.noint.df[, 14] <- ifelse(abs(summary(nointmod)$coeff[2, 3]) >
                                    abs(-3.33613 + ((-6.1101)/exper$nobs[k]) +
                                          ((-6.823)/(exper$nobs[k])^2)), 1, 0) #for 2 var, constant, 5%
    reps.restricted.df[, 16] <- ifelse(abs(summary(restrictedmod)$coeff[2, 3]) >
                                         abs(-3.74066 + ((-8.5631)/exper$nobs[k]) +
                                               ((-10.852)/(exper$nobs[k])^2) +
                                               ((27.982)/(exper$nobs[k])^3)), 1, 0) # for 3 var, constant, 5%
    reps.df[, 22] <- ifelse(abs(summary(genmod)$coeff[2, 3]) >
                              abs(-3.74066 + ((-8.5631)/exper$nobs[k]) +
                                    ((-10.852)/(exper$nobs[k])^2) +
                                    ((27.982)/(exper$nobs[k])^3)), 1, 0) #for 3 var, constant, 5%
    ### save whether stat. sig., except alpha_1
    for(i in c(13, 15:18)){
      reps.noint.df[, i] <- ifelse(reps.noint.df[, (i - 6)] < .05, 1, 0)
    }
    for(i in c(15, 17:21)){
      reps.restricted.df[, i] <- ifelse(reps.restricted.df[, (i - 7)] < .05, 1, 0)
    }
    for(i in c(21, 23:30)){
      reps.df[, i] <- ifelse(reps.df[, (i - 10)] < .05, 1, 0)
    }
    
    ### save MC variance
    for(i in 25:30){
      reps.noint.df[, i] <- rep(var(reps.noint.df[, (i - 24)]), reps)
    }
    for(i in 29:35){
      reps.restricted.df[, i] <- rep(var(reps.restricted.df[, (i - 28)]), reps)
    }
    for(i in 41:50){
      reps.df[, i] <- rep(var(reps.df[, (i - 40)]), reps)
    }
    
    ### save bias
    reps.noint.df$bias.alpha.0 <- abs(reps.noint.df$alpha.0 - alpha.0)
    reps.noint.df$bias.alpha.1 <- abs(reps.noint.df$alpha.1 - alpha.1)
    reps.noint.df$bias.beta.0 <- abs(reps.noint.df$beta.0 - beta.0)
    reps.noint.df$bias.beta.1 <- abs(reps.noint.df$beta.1 - beta.1)
    reps.noint.df$bias.beta.2 <- abs(reps.noint.df$beta.2 - beta.2)
    reps.noint.df$bias.beta.3 <- abs(reps.noint.df$beta.3 - beta.3)
    reps.restricted.df$bias.alpha.0 <- abs(reps.restricted.df$alpha.0 - alpha.0)
    reps.restricted.df$bias.alpha.1 <- abs(reps.restricted.df$alpha.1 - alpha.1)
    reps.restricted.df$bias.beta.0 <- abs(reps.restricted.df$beta.0 - beta.0)
    reps.restricted.df$bias.beta.1 <- abs(reps.restricted.df$beta.1 - beta.1)
    reps.restricted.df$bias.beta.2 <- abs(reps.restricted.df$beta.2 - beta.2)
    reps.restricted.df$bias.beta.3 <- abs(reps.restricted.df$beta.3 - beta.3)
    reps.restricted.df$bias.beta.4 <- abs(reps.restricted.df$beta.4 - beta.4)
    reps.df$bias.alpha.0 <- abs(reps.df$alpha.0 - alpha.0)
    reps.df$bias.alpha.1 <- abs(reps.df$alpha.1 - alpha.1)
    reps.df$bias.beta.0 <- abs(reps.df$beta.0 - beta.0)
    reps.df$bias.beta.1 <- abs(reps.df$beta.1 - beta.1)
    reps.df$bias.beta.2 <- abs(reps.df$beta.2 - beta.2)
    reps.df$bias.beta.3 <- abs(reps.df$beta.3 - beta.3)
    reps.df$bias.beta.4 <- abs(reps.df$beta.4 - beta.4)
    reps.df$bias.beta.5 <- abs(reps.df$beta.5 - beta.5)
    reps.df$bias.beta.6 <- abs(reps.df$beta.6 - beta.6)
    reps.df$bias.beta.7 <- abs(reps.df$beta.7 - beta.7)
    
    ### export data
    exper.list[[k]] <- reps.df
    exper.noint.list[[k]] <- reps.noint.df
    exper.restricted.list[[k]] <- reps.restricted.df
    close(pb)
  }
  
# merge into final df
df <- data.frame(matrix(nrow=0,ncol=10))
colnames(df) <- c("lo", "hi", "mean", "median", "var", "nobs", "rho.x", "rho.z", "rho.y", "model")
# quantiles -- full model
for(i in 1:nrow(exper)){
  tmp <- exper.list[[i]]
  tmpdf <- data.frame(t(apply(tmp, 2, quantile,c(.025,.975), na.rm=T))); colnames(tmpdf) <- c("lo","hi")
  tmpdf$mean <- apply(tmp, 2, mean, na.rm=TRUE)
  tmpdf$median <- apply(tmp, 2, median, na.rm=TRUE)  
  tmpdf$var <- rownames(tmpdf); rownames(tmpdf) <- NULL
  tmpdf$nobs <- rep(exper$nobs[i],nrow(tmpdf))
  tmpdf$rho.x <- rep(exper$rho.x[i],nrow(tmpdf))
  tmpdf$rho.z <- rep(exper$rho.z[i],nrow(tmpdf))
  tmpdf$rho.y <- rep(exper$rho.y[i],nrow(tmpdf))
  tmpdf$model <- rep("full",nrow(tmpdf))
  for(j in 1:ncol(tmpdf)){
    tmpdf[,j] <- as.character(tmpdf[,j])
  }
  df <- rbind(df,tmpdf)
}

# quantiles -- no interaction model
for(i in 1:nrow(exper)){
  tmp <- exper.noint.list[[i]]
  tmpdf <- data.frame(t(apply(tmp, 2, quantile,c(.025,.975), na.rm=T))); colnames(tmpdf) <- c("lo","hi")
  tmpdf$mean <- apply(tmp, 2, mean, na.rm=TRUE)
  tmpdf$median <- apply(tmp, 2, median, na.rm=TRUE)  
  tmpdf$var <- rownames(tmpdf); rownames(tmpdf) <- NULL
  tmpdf$nobs <- rep(exper$nobs[i],nrow(tmpdf))
  tmpdf$rho.x <- rep(exper$rho.x[i],nrow(tmpdf))
  tmpdf$rho.z <- rep(exper$rho.z[i],nrow(tmpdf))
  tmpdf$rho.y <- rep(exper$rho.y[i],nrow(tmpdf))
  tmpdf$model <- rep("noint",nrow(tmpdf))
  for(j in 1:ncol(tmpdf)){
    tmpdf[,j] <- as.character(tmpdf[,j])
  }
  df <- rbind(df,tmpdf)
}

# quantiles -- restricted model
for(i in 1:nrow(exper)){
  tmp <- exper.restricted.list[[i]]
  tmpdf <- data.frame(t(apply(tmp, 2, quantile,c(.025,.975), na.rm=T))); colnames(tmpdf) <- c("lo","hi")
  tmpdf$mean <- apply(tmp, 2, mean, na.rm=TRUE)
  tmpdf$median <- apply(tmp, 2, median, na.rm=TRUE)  
  tmpdf$var <- rownames(tmpdf); rownames(tmpdf) <- NULL
  tmpdf$nobs <- rep(exper$nobs[i],nrow(tmpdf))
  tmpdf$rho.x <- rep(exper$rho.x[i],nrow(tmpdf))
  tmpdf$rho.z <- rep(exper$rho.z[i],nrow(tmpdf))
  tmpdf$rho.y <- rep(exper$rho.y[i],nrow(tmpdf))
  tmpdf$model <- rep("restricted",nrow(tmpdf))
  for(j in 1:ncol(tmpdf)){
    tmpdf[,j] <- as.character(tmpdf[,j])
  }
  df <- rbind(df,tmpdf)
}

# finally, store for later
df_no_interaction <- df



#reset the master info
exper <- expand.grid(nobs = nobs,
                     rho.x = rho.x,
                     rho.z = rho.z,
                     rho.y = rho.y)
exper.list <- as.list(NULL)
exper.noint.list <- as.list(NULL)
exper.restricted.list <- as.list(NULL)

# store estimates and such from model with no interaction
reps.noint.df <- data.frame(matrix(ncol = 37, nrow = reps))
names(reps.noint.df) <- c(paste0("alpha.", 0:1), paste0("beta.", 0:3), paste0("p.alpha.", 0:1), paste0("p.beta.", 0:3), paste0("ss.alpha.", 0:1),
							paste0("ss.beta.", 0:3), paste0("cov.alpha.", 0:1), paste0("cov.beta.", 0:3), paste0("var.alpha.", 0:1), 
							paste0("var.beta.", 0:3), paste0("bias.alpha.", 0:1), paste0("bias.beta.", 0:3), "RMSE")

# store estimates and such from model with only present-period interaction
reps.restricted.df <- data.frame(matrix(ncol = 43, nrow = reps))
names(reps.restricted.df) <- c(paste0("alpha.", 0:1), paste0("beta.", 0:4), paste0("p.alpha.", 0:1), paste0("p.beta.", 0:4), paste0("ss.alpha.", 0:1),
							paste0("ss.beta.", 0:4), paste0("cov.alpha.", 0:1), paste0("cov.beta.", 0:4), paste0("var.alpha.", 0:1), 
							paste0("var.beta.", 0:4), paste0("bias.alpha.", 0:1), paste0("bias.beta.", 0:4), "RMSE")

# store the estimates and such from the full model
reps.df <- data.frame(matrix(ncol = 61, nrow = reps))
names(reps.df) <- c(paste0("alpha.", 0:1), paste0("beta.", 0:7), paste0("p.alpha.", 0:1), paste0("p.beta.", 0:7), paste0("ss.alpha.", 0:1),
							paste0("ss.beta.", 0:7), paste0("cov.alpha.", 0:1), paste0("cov.beta.", 0:7), paste0("var.alpha.", 0:1), 
							paste0("var.beta.", 0:7), paste0("bias.alpha.", 0:1), paste0("bias.beta.", 0:7), "RMSE")

### Run the loop for case when there's a basic interaction present
for(k in 1:nrow(exper)){
  # Or, for parallel	
  # foreach(k = 1:nrow(exper)) %dopar% {
  message(paste("experiment", k, "of", nrow(exper), sep = " "))
  
  # old, patch -- 50% subset rather than 20%, still 50 obs
  rmse.vec <- sample(seq(1:reps), reps/2, replace = FALSE)
  #rmse.vec <- sample(seq(1:reps), reps/5, replace = F
  
  pb <- txtProgressBar(min = 0, max = reps, style = 3)
  for(j in 1:reps){
    
    ### set up data
    x <- arima.sim(list(ar = exper$rho.x[k]), exper$nobs[k], n.start = 100)
    z <- arima.sim(list(ar = exper$rho.z[k]), exper$nobs[k], n.start = 100)
    df <- data.frame(cbind(x, z))
    # create lags
    df <- ddply(df, .(), transform, l.x = c(NA, x[-length(x)]))
    df <- ddply(df, .(), transform, l.z = c(NA, z[-length(z)]))
    # create interactions
    df$xz <- df$x*df$z
    df$l.x_z <- df$l.x*df$z
    df$x_l.z <- df$x*df$l.z
    df$l.x_l.z <- df$l.x*df$l.z
    # create DV -- basic interaction term
    df$y <- rep(NA, nrow(df))
    df$y[1] <- rnorm(1) # randomly start
    for(i in 2:nrow(df)){
      df$y[i] <- alpha.0 + alpha.1*df$y[i-1] + beta.0*df$x[i] +
        beta.1*df$l.x[i] + beta.2*df$z[i] + beta.3*df$l.z[i] +
        beta.4*df$xz[i] + rnorm(1)
    }
    # create lag DV
    df <- ddply(df, .(), transform, l.y = c(NA, y[-length(y)]))
    # drop first obs so no NA
    df <- df[-1, ]
    
    ### estimate the models
    nointmod <- glm(y ~ l.y + x + l.x + z + l.z, data = df)
    restrictedmod <- glm(y ~ l.y + x + l.x + z + l.z + xz, data = df)
    genmod <- glm(y ~ l.y + x + l.x + z + l.z + xz + l.x_z + x_l.z + l.x_l.z,
                  data = df)
    
    ### save estimates and p values
    reps.noint.df[j, 1:12] <- matrix(c(summary(nointmod)$coeff[, 1],
                                summary(nointmod)$coeff[, 4]), nrow = 1)
    reps.restricted.df[j, 1:14] <- matrix( c(summary(restrictedmod)$coeff[, 1],
                                     summary(restrictedmod)$coeff[, 4]), nrow = 1)
    reps.df[j, 1:20] <- matrix( c(summary(genmod)$coeff[, 1],
                          summary(genmod)$coeff[, 4]), nrow = 1)
    
    ### save whether true value covered by confidence interval -- includes Mult Comp. correction
    ci.noint <- data.frame(suppressMessages(confint(nointmod)))
    names(ci.noint) <- c("lo", "hi")
    ci.restricted <- data.frame(suppressMessages(confint(restrictedmod)))
    names(ci.restricted) <- c("lo", "hi")
    ci.gen <- data.frame(suppressMessages(confint(genmod)))
    names(ci.gen) <- c("lo", "hi")
    ci.noint$true <- c(alpha.0,alpha.1,beta.0,beta.1,beta.2,beta.3)
    ci.restricted$true <- c(alpha.0,alpha.1,beta.0,beta.1,beta.2,beta.3,beta.4)
    ci.gen$true <- c(alpha.0,alpha.1,beta.0,beta.1,beta.2,beta.3,beta.4,0,0,0)      # b5 = b6 = b7 = 0 in this sim
    ci.noint$cov <- ifelse(ci.noint$lo < ci.noint$true &
                             ci.noint$hi > ci.noint$true, 1, 0)
    ci.restricted$cov <- ifelse(ci.restricted$lo < ci.restricted$true &
                                  ci.restricted$hi > ci.restricted$true, 1, 0)
    ci.gen$cov <- ifelse(ci.gen$lo < ci.gen$true & ci.gen$hi > ci.gen$true, 1, 0)
    reps.noint.df[j, c(19:24)] <- ci.noint$cov
    reps.restricted.df[j, c(22:28)] <- ci.restricted$cov
    reps.df[j, c(31:40)] <- ci.gen$cov
    
    ### calculate RMSE from 5-fold cross-val -- for computational reasons, take a random 50% subset
    if((j %in% rmse.vec) == T){
      tc <- trainControl("cv", 5)
      reps.noint.df[j,ncol(reps.noint.df)] <-
        train(y ~ l.y + x + l.x + z + l.z,
              data = df, method = "glm", trControl = tc)$results$RMSE
      reps.restricted.df[j, ncol(reps.restricted.df)] <-
        train(y ~ l.y + x + l.x + z + l.z + xz,
              data = df, method = "glm", trControl = tc)$results$RMSE
      reps.df[j, ncol(reps.df)] <-
        train(y ~ l.y + x + l.x + z + l.z + xz + l.x_z + x_l.z + l.x_l.z,
              data = df, method = "glm", trControl = tc)$results$RMSE
    }
    # update progress bar
    setTxtProgressBar(pb, j)
  }
  
  ### significance of the ECM stat, E+M critical values
  reps.noint.df[, 14] <- ifelse(abs(summary(nointmod)$coeff[2, 3]) >
                                  abs(-3.33613 + ((-6.1101)/exper$nobs[k]) +
                                        ((-6.823)/(exper$nobs[k])^2)), 1, 0) #for 2 var, constant, 5%
  reps.restricted.df[, 16] <- ifelse(abs(summary(restrictedmod)$coeff[2, 3]) >
                                       abs(-3.74066 + ((-8.5631)/exper$nobs[k]) +
                                             ((-10.852)/(exper$nobs[k])^2) +
                                             ((27.982)/(exper$nobs[k])^3)), 1, 0) #for 3 var, constant, 5%
  reps.df[, 22] <- ifelse(abs(summary(genmod)$coeff[2, 3]) >
                            abs(-3.74066 + ((-8.5631)/exper$nobs[k]) +
                                  ((-10.852)/(exper$nobs[k])^2) +
                                  ((27.982)/(exper$nobs[k])^3)), 1, 0) #for 3 var, constant, 5%
  
  ### save whether stat. sig., except alpha_1
  for(i in c(13, 15:18)){
    reps.noint.df[, i] <- ifelse(reps.noint.df[, (i - 6)] < .05, 1, 0)
  }
  for(i in c(15, 17:21)){
    reps.restricted.df[, i] <- ifelse(reps.restricted.df[, (i - 7)] < .05, 1, 0)
  }
  for(i in c(21, 23:30)){
    reps.df[, i] <- ifelse(reps.df[, (i - 10)] < .05, 1, 0)
  }
  
  ### save MC variance
  for(i in 25:30){
    reps.noint.df[, i] <- rep(var(reps.noint.df[, (i - 24)]), reps)
  } 
  for(i in 29:35){
    reps.restricted.df[, i] <- rep(var(reps.restricted.df[, (i - 28)]), reps)
  }
  for(i in 41:50){
    reps.df[, i] <- rep(var(reps.df[, (i - 40)]), reps)
  }
  
  ### save bias
  reps.noint.df$bias.alpha.0 <- abs(reps.noint.df$alpha.0 - alpha.0)
  reps.noint.df$bias.alpha.1 <- abs(reps.noint.df$alpha.1 - alpha.1)
  reps.noint.df$bias.beta.0 <- abs(reps.noint.df$beta.0 - beta.0)
  reps.noint.df$bias.beta.1 <- abs(reps.noint.df$beta.1 - beta.1)
  reps.noint.df$bias.beta.2 <- abs(reps.noint.df$beta.2 - beta.2)
  reps.noint.df$bias.beta.3 <- abs(reps.noint.df$beta.3 - beta.3)
  reps.restricted.df$bias.alpha.0 <- abs(reps.restricted.df$alpha.0 - alpha.0)
  reps.restricted.df$bias.alpha.1 <- abs(reps.restricted.df$alpha.1 - alpha.1)
  reps.restricted.df$bias.beta.0 <- abs(reps.restricted.df$beta.0 - beta.0)
  reps.restricted.df$bias.beta.1 <- abs(reps.restricted.df$beta.1 - beta.1)
  reps.restricted.df$bias.beta.2 <- abs(reps.restricted.df$beta.2 - beta.2)
  reps.restricted.df$bias.beta.3 <- abs(reps.restricted.df$beta.3 - beta.3)
  reps.restricted.df$bias.beta.4 <- abs(reps.restricted.df$beta.4 - beta.4)
  reps.df$bias.alpha.0 <- abs(reps.df$alpha.0 - alpha.0)
  reps.df$bias.alpha.1 <- abs(reps.df$alpha.1 - alpha.1)
  reps.df$bias.beta.0 <- abs(reps.df$beta.0 - beta.0)
  reps.df$bias.beta.1 <- abs(reps.df$beta.1 - beta.1)
  reps.df$bias.beta.2 <- abs(reps.df$beta.2 - beta.2)
  reps.df$bias.beta.3 <- abs(reps.df$beta.3 - beta.3)
  reps.df$bias.beta.4 <- abs(reps.df$beta.4 - beta.4)
  reps.df$bias.beta.5 <- abs(reps.df$beta.5 - beta.5)
  reps.df$bias.beta.6 <- abs(reps.df$beta.6 - beta.6)
  reps.df$bias.beta.7 <- abs(reps.df$beta.7 - beta.7)
  
  ### export data
  exper.list[[k]] <- reps.df
  exper.noint.list[[k]] <- reps.noint.df
  exper.restricted.list[[k]] <- reps.restricted.df
  close(pb)
}

# merge into final df
df <- data.frame(matrix(nrow = 0, ncol = 10))

names(df) <- c('lo', 'hi', "mean", "median", "var", "nobs", "rho.x", "rho.z", "rho.y", "model")

# quantiles -- full model
for(i in 1:nrow(exper)){
  tmp <- exper.list[[i]]
  tmpdf <- data.frame(t(apply(tmp, 2, quantile, c(.025, .975), na.rm = T)))
  names(tmpdf) <- c("lo","hi")
  tmpdf$mean <- apply(tmp, 2, mean, na.rm=TRUE)
  tmpdf$median <- apply(tmp, 2, median, na.rm=TRUE)  
  tmpdf$var <- rownames(tmpdf); rownames(tmpdf) <- NULL
  tmpdf$nobs <- rep(exper$nobs[i], nrow(tmpdf))
  tmpdf$rho.x <- rep(exper$rho.x[i], nrow(tmpdf))
  tmpdf$rho.z <- rep(exper$rho.z[i], nrow(tmpdf))
  tmpdf$rho.y <- rep(exper$rho.y[i], nrow(tmpdf))
  tmpdf$model <- rep("full", nrow(tmpdf))
  for(j in 1:ncol(tmpdf)){
    tmpdf[, j] <- as.character(tmpdf[, j])
  }
  df <- rbind(df, tmpdf)
}
# quantiles -- no interaction model
for(i in 1:nrow(exper)){
  tmp <- exper.noint.list[[i]]
  tmpdf <- data.frame(t(apply(tmp, 2, quantile, c(.025, .975), na.rm = T)))
  names(tmpdf) <- c("lo","hi")
  tmpdf$mean <- apply(tmp, 2, mean, na.rm=TRUE)
  tmpdf$median <- apply(tmp, 2, median, na.rm=TRUE)  
  tmpdf$var <- rownames(tmpdf); rownames(tmpdf) <- NULL
  tmpdf$nobs <- rep(exper$nobs[i], nrow(tmpdf))
  tmpdf$rho.x <- rep(exper$rho.x[i], nrow(tmpdf))
  tmpdf$rho.z <- rep(exper$rho.z[i], nrow(tmpdf))
  tmpdf$rho.y <- rep(exper$rho.y[i], nrow(tmpdf))
  tmpdf$model <- rep("noint", nrow(tmpdf))
  for(j in 1:ncol(tmpdf)){
    tmpdf[, j] <- as.character(tmpdf[, j])
  }
  df <- rbind(df, tmpdf)
}
# quantiles -- restricted interaction model
for(i in 1:nrow(exper)){
  tmp <- exper.restricted.list[[i]]
  tmpdf <- data.frame(t(apply(tmp, 2, quantile, c(.025,.975), na.rm = T)))
  names(tmpdf) <- c("lo","hi")
  tmpdf$mean <- apply(tmp, 2, mean, na.rm=TRUE)
  tmpdf$median <- apply(tmp, 2, median, na.rm=TRUE)  
  tmpdf$var <- rownames(tmpdf); rownames(tmpdf) <- NULL
  tmpdf$nobs <- rep(exper$nobs[i], nrow(tmpdf))
  tmpdf$rho.x <- rep(exper$rho.x[i], nrow(tmpdf))
  tmpdf$rho.z <- rep(exper$rho.z[i], nrow(tmpdf))
  tmpdf$rho.y <- rep(exper$rho.y[i], nrow(tmpdf))
  tmpdf$model <- rep("restricted", nrow(tmpdf))
  for(j in 1:ncol(tmpdf)){
    tmpdf[, j] <- as.character(tmpdf[, j])
  }
  df <- rbind(df, tmpdf)
}
# finally, store for later
df_restricted <- df




#reset the master info
exper <- expand.grid(nobs = nobs,
                     rho.x = rho.x,
                     rho.z = rho.z,
                     rho.y = rho.y)
exper.list <- as.list(NULL)
exper.noint.list <- as.list(NULL)
exper.restricted.list <- as.list(NULL)

# store estimates and such from model with no interaction
reps.noint.df <- data.frame(matrix(ncol = 37, nrow = reps))
names(reps.noint.df) <- c(paste0("alpha.", 0:1), paste0("beta.", 0:3), paste0("p.alpha.", 0:1), paste0("p.beta.", 0:3), paste0("ss.alpha.", 0:1),
							paste0("ss.beta.", 0:3), paste0("cov.alpha.", 0:1), paste0("cov.beta.", 0:3), paste0("var.alpha.", 0:1), 
							paste0("var.beta.", 0:3), paste0("bias.alpha.", 0:1), paste0("bias.beta.", 0:3), "RMSE")

# store estimates and such from model with only present-period interaction
reps.restricted.df <- data.frame(matrix(ncol = 43, nrow = reps))
names(reps.restricted.df) <- c(paste0("alpha.", 0:1), paste0("beta.", 0:4), paste0("p.alpha.", 0:1), paste0("p.beta.", 0:4), paste0("ss.alpha.", 0:1),
							paste0("ss.beta.", 0:4), paste0("cov.alpha.", 0:1), paste0("cov.beta.", 0:4), paste0("var.alpha.", 0:1), 
							paste0("var.beta.", 0:4), paste0("bias.alpha.", 0:1), paste0("bias.beta.", 0:4), "RMSE")

# store the estimates and such from the full model
reps.df <- data.frame(matrix(ncol = 61, nrow = reps))
names(reps.df) <- c(paste0("alpha.", 0:1), paste0("beta.", 0:7), paste0("p.alpha.", 0:1), paste0("p.beta.", 0:7), paste0("ss.alpha.", 0:1),
							paste0("ss.beta.", 0:7), paste0("cov.alpha.", 0:1), paste0("cov.beta.", 0:7), paste0("var.alpha.", 0:1), 
							paste0("var.beta.", 0:7), paste0("bias.alpha.", 0:1), paste0("bias.beta.", 0:7), "RMSE")


### Run the loop for case when there's a full interaction
for(k in 1:nrow(exper)){
  # Or, for parallel	
  # foreach(k = 1:nrow(exper)) %dopar% {
  message(paste("experiment", k, "of", nrow(exper), sep = " "))
  
  # old, patch -- 50% subset rather than 20%, still 50 obs
  rmse.vec <- sample(seq(1:reps), reps/2, replace = FALSE)
  #rmse.vec <- sample(seq(1:reps), reps/5, replace = F
  
  pb <- txtProgressBar(min = 0, max = reps, style = 3)
  for(j in 1:reps){
    
    ### set up data
    x <- arima.sim(list(ar = exper$rho.x[k]), exper$nobs[k], n.start = 100)
    z <- arima.sim(list(ar = exper$rho.z[k]), exper$nobs[k], n.start = 100)
    df <- data.frame(cbind(x, z))
    # create lags
    df <- ddply(df, .(), transform, l.x = c(NA, x[-length(x)]))
    df <- ddply(df, .(), transform, l.z = c(NA, z[-length(z)]))
    # create interactions
    df$xz <- df$x*df$z
    df$l.x_z <- df$l.x*df$z
    df$x_l.z <- df$x*df$l.z
    df$l.x_l.z <- df$l.x*df$l.z
    # create DV -- full interaction
    df$y <- rep(NA, nrow(df))
    df$y[1] <- rnorm(1) # randomly start
    for(i in 2:nrow(df)){
      df$y[i] <- alpha.0 + alpha.1*df$y[i-1] + beta.0*df$x[i] +
        beta.1*df$l.x[i] + beta.2*df$z[i] + beta.3*df$l.z[i] +
        beta.4*df$xz[i] + beta.5*df$l.x_z[i] + beta.6*df$x_l.z[i] +
        beta.7*df$l.x_l.z[i] + rnorm(1)
    }
    # create lag DV
    df <- ddply(df, .(), transform, l.y = c(NA, y[-length(y)]))
    # drop first obs so no NA
    df <- df[-1, ]
    
    ### estimate the models
    nointmod <- glm(y ~ l.y + x + l.x + z + l.z, data = df)
    restrictedmod <- glm(y ~ l.y + x + l.x + z + l.z + xz, data = df)
    genmod <- glm(y ~ l.y + x + l.x + z + l.z + xz + l.x_z + x_l.z + l.x_l.z,
                  data = df)
    
    ### save estimates and p values
    reps.noint.df[j, 1:12] <- matrix(c(summary(nointmod)$coeff[, 1],
                                summary(nointmod)$coeff[, 4]), nrow = 1)
    reps.restricted.df[j, 1:14] <- matrix( c(summary(restrictedmod)$coeff[, 1],
                                     summary(restrictedmod)$coeff[, 4]), nrow = 1)
    reps.df[j, 1:20] <- matrix( c(summary(genmod)$coeff[, 1],
                          summary(genmod)$coeff[, 4]), nrow = 1)
    
    ### save whether true value covered by confidence interval -- includes Mult Comp. correction
    ci.noint <- data.frame(suppressMessages(confint(nointmod)))
    names(ci.noint) <- c("lo", "hi")
    ci.restricted <- data.frame(suppressMessages(confint(restrictedmod)))
    names(ci.restricted) <- c("lo", "hi")
    ci.gen <- data.frame(suppressMessages(confint(genmod)))
    names(ci.gen) <- c("lo", "hi")
    ci.noint$true <- c(alpha.0,alpha.1,beta.0,beta.1,beta.2,beta.3)
    ci.restricted$true <- c(alpha.0,alpha.1,beta.0,beta.1,beta.2,beta.3,beta.4)
    ci.gen$true <- c(alpha.0,alpha.1,beta.0,beta.1,beta.2,beta.3,beta.4,beta.5,
                     beta.6,beta.7)
    ci.noint$cov <- ifelse(ci.noint$lo < ci.noint$true &
                             ci.noint$hi > ci.noint$true, 1, 0)
    ci.restricted$cov <- ifelse(ci.restricted$lo < ci.restricted$true &
                                  ci.restricted$hi > ci.restricted$true, 1, 0)
    ci.gen$cov <- ifelse(ci.gen$lo < ci.gen$true & ci.gen$hi > ci.gen$true, 1, 0)
    reps.noint.df[j, c(19:24)] <- ci.noint$cov
    reps.restricted.df[j, c(22:28)] <- ci.restricted$cov
    reps.df[j, c(31:40)] <- ci.gen$cov
    
    ### calculate RMSE from 5-fold cross-val -- for computational reasons, take a random 50% subset
    if((j %in% rmse.vec) == T){
      tc <- trainControl("cv", 5)
      reps.noint.df[j,ncol(reps.noint.df)] <-
        train(y ~ l.y + x + l.x + z + l.z,
              data = df, method = "glm", trControl = tc)$results$RMSE
      reps.restricted.df[j, ncol(reps.restricted.df)] <-
        train(y ~ l.y + x + l.x + z + l.z + xz,
              data = df, method = "glm", trControl = tc)$results$RMSE
      reps.df[j, ncol(reps.df)] <-
        train(y ~ l.y + x + l.x + z + l.z + xz + l.x_z + x_l.z + l.x_l.z,
              data = df, method = "glm", trControl = tc)$results$RMSE
    }
    # update progress bar
    setTxtProgressBar(pb, j)
  }
  
  ### significance of the ECM stat, E+M critical values
  reps.noint.df[, 14] <- ifelse(abs(summary(nointmod)$coeff[2, 3]) >
                                  abs(-3.33613 + ((-6.1101)/exper$nobs[k]) +
                                        ((-6.823)/(exper$nobs[k])^2)), 1, 0) #for 2 var, constant, 5%
  reps.restricted.df[, 16] <- ifelse(abs(summary(restrictedmod)$coeff[2, 3]) >
                                       abs(-3.74066 + ((-8.5631)/exper$nobs[k]) +
                                             ((-10.852)/(exper$nobs[k])^2) +
                                             ((27.982)/(exper$nobs[k])^3)), 1, 0) #for 3 var, constant, 5%
  reps.df[, 22] <- ifelse(abs(summary(genmod)$coeff[2, 3]) >
                            abs(-3.74066 + ((-8.5631)/exper$nobs[k]) +
                                  ((-10.852)/(exper$nobs[k])^2) +
                                  ((27.982)/(exper$nobs[k])^3)), 1, 0) #for 3 var, constant, 5%
  
  ### save whether stat. sig., except alpha_1
  for(i in c(13, 15:18)){
    reps.noint.df[, i] <- ifelse(reps.noint.df[, (i - 6)] < .05, 1, 0)
  }
  for(i in c(15, 17:21)){
    reps.restricted.df[, i] <- ifelse(reps.restricted.df[, (i - 7)] < .05, 1, 0)
  }
  for(i in c(21, 23:30)){
    reps.df[, i] <- ifelse(reps.df[, (i - 10)] < .05, 1, 0)
  }
  
  ### save MC variance
  for(i in 25:30){
    reps.noint.df[, i] <- rep(var(reps.noint.df[, (i - 24)]), reps)
  } 
  for(i in 29:35){
    reps.restricted.df[, i] <- rep(var(reps.restricted.df[, (i - 28)]), reps)
  }
  for(i in 41:50){
    reps.df[, i] <- rep(var(reps.df[, (i - 40)]), reps)
  }
  
  ### save bias
  reps.noint.df$bias.alpha.0 <- abs(reps.noint.df$alpha.0 - alpha.0)
  reps.noint.df$bias.alpha.1 <- abs(reps.noint.df$alpha.1 - alpha.1)
  reps.noint.df$bias.beta.0 <- abs(reps.noint.df$beta.0 - beta.0)
  reps.noint.df$bias.beta.1 <- abs(reps.noint.df$beta.1 - beta.1)
  reps.noint.df$bias.beta.2 <- abs(reps.noint.df$beta.2 - beta.2)
  reps.noint.df$bias.beta.3 <- abs(reps.noint.df$beta.3 - beta.3)
  reps.restricted.df$bias.alpha.0 <- abs(reps.restricted.df$alpha.0 - alpha.0)
  reps.restricted.df$bias.alpha.1 <- abs(reps.restricted.df$alpha.1 - alpha.1)
  reps.restricted.df$bias.beta.0 <- abs(reps.restricted.df$beta.0 - beta.0)
  reps.restricted.df$bias.beta.1 <- abs(reps.restricted.df$beta.1 - beta.1)
  reps.restricted.df$bias.beta.2 <- abs(reps.restricted.df$beta.2 - beta.2)
  reps.restricted.df$bias.beta.3 <- abs(reps.restricted.df$beta.3 - beta.3)
  reps.restricted.df$bias.beta.4 <- abs(reps.restricted.df$beta.4 - beta.4)
  reps.df$bias.alpha.0 <- abs(reps.df$alpha.0 - alpha.0)
  reps.df$bias.alpha.1 <- abs(reps.df$alpha.1 - alpha.1)
  reps.df$bias.beta.0 <- abs(reps.df$beta.0 - beta.0)
  reps.df$bias.beta.1 <- abs(reps.df$beta.1 - beta.1)
  reps.df$bias.beta.2 <- abs(reps.df$beta.2 - beta.2)
  reps.df$bias.beta.3 <- abs(reps.df$beta.3 - beta.3)
  reps.df$bias.beta.4 <- abs(reps.df$beta.4 - beta.4)
  reps.df$bias.beta.5 <- abs(reps.df$beta.5 - beta.5)
  reps.df$bias.beta.6 <- abs(reps.df$beta.6 - beta.6)
  reps.df$bias.beta.7 <- abs(reps.df$beta.7 - beta.7)
  
  ### export data
  exper.list[[k]] <- reps.df
  exper.noint.list[[k]] <- reps.noint.df
  exper.restricted.list[[k]] <- reps.restricted.df
  close(pb)
}

# merge into final df
df <- data.frame(matrix(nrow = 0, ncol = 10))

names(df) <- c('lo', 'hi', "mean", "median", "var", "nobs", "rho.x", "rho.z", "rho.y", "model")

# quantiles -- full model
for(i in 1:nrow(exper)){
  tmp <- exper.list[[i]]
  tmpdf <- data.frame(t(apply(tmp, 2, quantile, c(.025, .975), na.rm = T)))
  names(tmpdf) <- c("lo","hi")
  tmpdf$mean <- apply(tmp, 2, mean, na.rm=TRUE)
  tmpdf$median <- apply(tmp, 2, median, na.rm=TRUE)  
  tmpdf$var <- rownames(tmpdf); rownames(tmpdf) <- NULL
  tmpdf$nobs <- rep(exper$nobs[i], nrow(tmpdf))
  tmpdf$rho.x <- rep(exper$rho.x[i], nrow(tmpdf))
  tmpdf$rho.z <- rep(exper$rho.z[i], nrow(tmpdf))
  tmpdf$rho.y <- rep(exper$rho.y[i], nrow(tmpdf))
  tmpdf$model <- rep("full", nrow(tmpdf))
  for(j in 1:ncol(tmpdf)){
    tmpdf[, j] <- as.character(tmpdf[, j])
  }
  df <- rbind(df, tmpdf)
}
# quantiles -- no interaction model
for(i in 1:nrow(exper)){
  tmp <- exper.noint.list[[i]]
  tmpdf <- data.frame(t(apply(tmp, 2, quantile, c(.025, .975), na.rm = T)))
  names(tmpdf) <- c("lo","hi")
  tmpdf$mean <- apply(tmp, 2, mean, na.rm=TRUE)
  tmpdf$median <- apply(tmp, 2, median, na.rm=TRUE)  
  tmpdf$var <- rownames(tmpdf); rownames(tmpdf) <- NULL
  tmpdf$nobs <- rep(exper$nobs[i], nrow(tmpdf))
  tmpdf$rho.x <- rep(exper$rho.x[i], nrow(tmpdf))
  tmpdf$rho.z <- rep(exper$rho.z[i], nrow(tmpdf))
  tmpdf$rho.y <- rep(exper$rho.y[i], nrow(tmpdf))
  tmpdf$model <- rep("noint", nrow(tmpdf))
  for(j in 1:ncol(tmpdf)){
    tmpdf[, j] <- as.character(tmpdf[, j])
  }
  df <- rbind(df, tmpdf)
}
# quantiles -- restricted interaction model
for(i in 1:nrow(exper)){
  tmp <- exper.restricted.list[[i]]
  tmpdf <- data.frame(t(apply(tmp, 2, quantile, c(.025,.975), na.rm = T)))
  names(tmpdf) <- c("lo","hi")
  tmpdf$mean <- apply(tmp, 2, mean, na.rm=TRUE)
  tmpdf$median <- apply(tmp, 2, median, na.rm=TRUE)  
  tmpdf$var <- rownames(tmpdf); rownames(tmpdf) <- NULL
  tmpdf$nobs <- rep(exper$nobs[i], nrow(tmpdf))
  tmpdf$rho.x <- rep(exper$rho.x[i], nrow(tmpdf))
  tmpdf$rho.z <- rep(exper$rho.z[i], nrow(tmpdf))
  tmpdf$rho.y <- rep(exper$rho.y[i], nrow(tmpdf))
  tmpdf$model <- rep("restricted", nrow(tmpdf))
  for(j in 1:ncol(tmpdf)){
    tmpdf[, j] <- as.character(tmpdf[, j])
  }
  df <- rbind(df, tmpdf)
}
# finally, store for later
df_full_interaction <- df



# merge all together
df_full_interaction$dgp <- rep("full_interaction", nrow(df_full_interaction))
df_restricted$dgp <- rep("restricted", nrow(df_restricted))
df_no_interaction$dgp <- rep("no_interaction", nrow(df_no_interaction))
df_spurious$dgp <- rep("spurious", nrow(df_spurious))
df <- rbind(df_spurious, df_no_interaction, df_restricted, df_full_interaction) # 6:32 pm, Friday

write.csv(df, "WVKJ-MC-simulations.csv", row.names = F)

